This file documents our reanalysis of the dataset used to examine biodiversity and conflict relationships in the Southern Philippines presented in this paper https://doi.org/10.1038/s44185-024-00044-8.\ It should be noted that the analysis below does not take spatial autocorrelation into account; we address this in Supplementary FS3.
Files needed
##1. "landcover" folder containing the raster files of 1988 and 2020 land cover types
##2. "mobios_tab.csv" or the biodiversity data
##3. "conflict_tab_89-21.csv" or the conflict data
##4. "1988_lc_classmap.csv" 1988 land cover types code
##5. "2020_lc_classmap.csv" 2020 land cover types code
Load required packages
## make sure to install all packages including required dependencies prior to use
re.lib <- c("tidyverse", "dplyr", "ggplot2", "patchwork",
"sf", "terra", "raster", "lme4", "MASS", "piecewiseSEM")
## check if libraries are installed, install them if not
for (lib in re.lib) {
if (!requireNamespace(lib, quietly = TRUE)) {
install.packages(lib)
}
library(lib, character.only = TRUE)
}
Prepare datasets
## biodiversity data obtained from https://doi.org/10.15468/rtedgk
## conflict data obtained from https://data.humdata.org/dataset/ucdp-data-for-philippines?
## note that only a subset of this dataset (i.e., those from Mindanao and adjacent islands) was used
## biodiversity data
bio <- read.csv("data/mobios_tab.csv", header = TRUE, sep = ",")
## subset data
bio.sub <- subset(bio, select = c(12, 23, 20, 15, 16))
bio.sub <- bio.sub %>% filter(county!="") # remove rows with no county information
bio.sub <- bio.sub %>% filter(class!="Malacostraca") # exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Bivalvia") # exclude this taxon
bio.sub <- bio.sub %>% filter(class!="Gastropoda")# exclude this taxon
## check province name
## there are rows where the name of the province is uncertain as indicated by "/"
## "Zamboanga del Norte" was changed to "Zamboanga del Norte" prior to import of data
unique(bio.sub$county)
## [1] "Lanao del Norte"
## [2] "South Cotabato"
## [3] "North Cotabato"
## [4] "Camiguin Island"
## [5] "Maguindanao"
## [6] "Agusan del Sur"
## [7] "Bukidnon"
## [8] "Davao Oriental"
## [9] "Davao"
## [10] "Misamis Occidental"
## [11] "Surigao del Sur"
## [12] "Davao del sur"
## [13] "Lanao del Sur"
## [14] "Sarangani"
## [15] "Misamis Oriental"
## [16] "Surigao del Norte"
## [17] "Davao de Oro"
## [18] "Davao del Norte"
## [19] "Zamboanga del Sur"
## [20] "Surigao del Norte/Agusan del Sur"
## [21] "Dinagat Island"
## [22] "Basilan"
## [23] "Tawi-Tawi"
## [24] "Sulu"
## [25] "Sultan Kudarat"
## [26] "Agusan del Norte"
## [27] "Cagayan de Oro"
## [28] "Bukidnon/Camiguin"
## [29] "Bukidnon/Misamis Oriental/Iligan"
## [30] "Zamboanga del Norte"
## [31] "Zamboanga Sibugay"
## [32] "Zamboanga del Norte"
## [33] "Misamis Occidental/Misamis Oriental/Camiguin/Bukidnon"
## [34] "Zamboanga City"
## [35] "General Santos"
## [36] "North Cotabato/Davao City"
## remove uncertain province names
bio.sub <- bio.sub %>% filter(!grepl('/', county))
head(bio.sub)
## write to a csv
write.csv(bio.sub, "bio.sub.csv")
## conflict data
con <- read.csv("data/conflict_tab_89-21.csv", header = TRUE, sep = ",")
con.sub <- subset(con, select = c(1,3,15,18,30,32,33))
head(con.sub)
## write to a csv
write.csv(con.sub, "con.sub.csv")
Count recorded species per taxon in each point and within province
## the procedures for conducting species counts in each site were not mentioned in the paper
## according to the paper, analysis was done at the provincial level
## there could be two ways that species counts were done (see image below)
## this part is extremely important, which the authors failed to discuss
## the left panel of the figure shows that species counts were aggregated at the provincial level
## each point (within the province) receives the same value of species count (or species richness)
## the right panel shows that each point has a unique number of species count
## note that these values are crucial for other downstream analyses
## particularly when relationships of species counts
## and distance to the nearest conflict sites are considered
## we generated the values for these two scenarios below
## and test whether any of them would match the results presented in the paper
knitr::include_graphics("images/bioconflict.png")
## count unique records (i.e., species richness) per point
bio.data.sum <- bio.sub %>%
group_by(county, class, decimalLatitude, decimalLongitude) %>%
mutate(NumbSp = n_distinct(scientificName))
head(bio.data.sum)
## write to a csv
write.csv(bio.data.sum, "bio.data.sum.csv")
## count species records per province (this lumps all the data,
## thus each point will have a single species richness value)
prov.bio.data.sum <- bio.sub %>%
group_by(county, class) %>%
mutate(NumbSp = n_distinct(scientificName))
head(prov.bio.data.sum)
## write to a csv
write.csv(prov.bio.data.sum, "prov.data.sum.csv")
Calculate distance of species record to the nearest conflict sites using QGIS
## in QGIS, import the "bio.data.sum.csv" and "con.sub.csv" from R as delimited text files.
## export these files as GeoPackage or Shapefile to convert the data to meters prior to calculation
## use the CRS UTM Zone 51N
knitr::include_graphics("images/qgis_geo.png")
knitr::include_graphics("images/qgis_utm.png")
## import back the newly saved data to QGIS
## get the distance of nearest conflict areas using the "Joint attributes by nearest" plugin
## QGIS > Processing Toolbox > Join attributes by nearest
knitr::include_graphics("images/qgis_join.png")
## after calculating the distance, a new layer will be created.
## the new layer contains the calculated distance in the attribute table
## this also includes the x and y coordinates of the nearest conflict site
## then export this new layer as a csv file so we can use the data in R for other analysis
## repeat the entire process for provincial data (i.e., prov.bio.data.sum.csv) if necessary
## figure below shows the nearest conflict point to species record (black line)
## notice the remainder of conflict points are not included
knitr::include_graphics("images/hub.png")
Calculate average distance
## species richness per point in each province
## read file with calculated distance from QGIS
bio.con.dist <- read.csv("data/bio.con.dist.csv", header = TRUE, sep = ",")
## get average distance by province for each taxon
mean.dist <- bio.con.dist %>%
group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
head(mean.dist)
write.csv(mean.dist, "mean.distance.csv")
## species richness lumped per province
## read file with calculated distance in QGIS
prov.bio.con.dist <- read.csv("data/prov.bio.con.dist.csv", header = TRUE, sep = ",")
## get average distance by province for each taxon
prov.mean.dist <- prov.bio.con.dist %>%
group_by(county, class, NumbSp, decimalLatitude, decimalLongitude) %>%
summarize(meanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class', 'NumbSp',
## 'decimalLatitude'. You can override using the `.groups` argument.
write.csv(prov.mean.dist, "prov.mean.distance.csv")
#get average distance by province without considering the long/lat data (i.e., lumped average)
prov.lmp.dist <- prov.bio.con.dist %>%
group_by(county, class, NumbSp) %>%
summarize(lmpMeanDistance = mean(distance, na.rm = TRUE))
## `summarise()` has grouped output by 'county', 'class'. You can override using
## the `.groups` argument.
write.csv(prov.lmp.dist, "prov.lmp.distance.csv")
Plot species record vs average distance
## get taxa names
key.lab <- unique(mean.dist$class)
## assign point shapes and colors
plot.color <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
point.shape <- c(0,1,2,3,4,5,6,7,8,9,10)
## plot per point species richness vs. mean distance
p1 <- ggplot(mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"),
legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15),
legend.key=element_blank()) +
ylab("Number of species") +
xlab("Average distance (m)") +
labs(title = "Point species richness") +
geom_point(size = 3, stroke = 0.6) +
geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
scale_color_manual(name = "", labels = key.lab, values = plot.color) +
scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## plot lumped species richness vs. mean distance
p2 <- ggplot(prov.mean.dist, (aes(x=meanDistance, y=NumbSp, color=class, shape=class))) + theme_bw() +
theme(axis.text=element_text(size=15), axis.title=element_text(size=15, face="bold"),
legend.key.width = unit(0.8, units = "cm"), legend.text = element_text(size=15),
legend.key=element_blank()) +
ylab("Number of species") +
xlab("Average distance (m)") +
labs(title = "Lumped species richness") +
geom_point(size = 3, stroke = 0.6) +
geom_smooth(method=lm, se=FALSE, fullrange=FALSE, size = 2) +
scale_color_manual(name = "", labels = key.lab, values = plot.color) +
scale_shape_manual(name = "", labels = key.lab, values = point.shape)
## combine plots
comb.plot <- (p1 + p2 +
plot_annotation(title = ""))
comb.plot <- comb.plot +
plot_layout(guides = "collect") &
theme(legend.position = "right")
comb.plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Get number of conflict sites per province
## we are not sure how the frequency of conflicts were scored in the paper as it was not mentioned
## we can get frequency based on the number of unique conflict points within each province
con.sub.2 <- con.sub
con.sub.2 <- con.sub.2 %>% filter(PROVINCE!="") #filter empty rows
con.freq <- con.sub.2 %>%
group_by(PROVINCE) %>%
mutate(ConflicFreq = n_distinct(latitude, longitude)) #use only the unique latitude
con.freq1 <- con.freq %>% distinct(PROVINCE, .keep_all = TRUE)
write.csv(con.freq1, "conflict.frequency.unique.csv")
## or we count each row as a unique report of conflict regardless of coordinates
con.freq2 <- con.sub.2 %>%
group_by(PROVINCE) %>%
count()
write.csv(con.freq2, "conflict.frequency.rows.csv")
ggplot(data=con.freq2, aes(x=reorder(PROVINCE, -n), y=n)) +
geom_bar(stat="identity", fill="#5794db") +
ylab("Frequency") +
xlab("Province") +
labs(title = "Conflict frequency") +
guides(x = guide_axis(angle = 75))
## combine biodiversity and conflict data (lumped)
prov.comb <- prov.lmp.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
if_else(county == "Agusan del Sur", 85,
if_else(county == "Basilan", 329,
if_else(county == "Bukidnon", 82,
if_else(county == "Cagayan de Oro", 0,
if_else(county == "Camiguin Island", 0,
if_else(county == "Cotabato", 221,
if_else(county == "Davao", 0,
if_else(county == "Davao Oriental", 52,
if_else(county == "Davao de Oro", 113,
if_else(county == "Davao del Norte", 56,
if_else(county == "Davao del sur", 181,
if_else(county == "Dinagat Island", 0,
if_else(county == "General Santos", 0,
if_else(county == "Lanao del Norte", 66,
if_else(county == "Lanao del Sur", 181,
if_else(county == "Maguindanao", 352,
if_else(county == "Misamis Occidental", 15,
if_else(county == "Misamis Oriental", 38,
if_else(county == "North Cotabato", 0,
if_else(county == "Sarangani", 25,
if_else(county == "South Cotabato", 55,
if_else(county == "Sultan Kudarat", 62,
if_else(county == "Sulu", 393,
if_else(county == "Surigao del Norte", 29,
if_else(county == "Surigao del Sur", 72,
if_else(county == "Tawi-Tawi", 17,
if_else(county == "Zamboanga City", 0,
if_else(county == "Zamboanga Sibugay", 18,
if_else(county == "Zamboanga del Norte", 34,
if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(prov.comb, "prov.lmp.comb.csv")
## combine biodiversity and conflict data (per point)
perpoint.comb <- mean.dist %>% mutate(conflictFreq = if_else(county == "Agusan del Norte", 50,
if_else(county == "Agusan del Sur", 85,
if_else(county == "Basilan", 329,
if_else(county == "Bukidnon", 82,
if_else(county == "Cagayan de Oro", 0,
if_else(county == "Camiguin Island", 0,
if_else(county == "Cotabato", 221,
if_else(county == "Davao", 0,
if_else(county == "Davao Oriental", 52,
if_else(county == "Davao de Oro", 113,
if_else(county == "Davao del Norte", 56,
if_else(county == "Davao del sur", 181,
if_else(county == "Dinagat Island", 0,
if_else(county == "General Santos", 0,
if_else(county == "Lanao del Norte", 66,
if_else(county == "Lanao del Sur", 181,
if_else(county == "Maguindanao", 352,
if_else(county == "Misamis Occidental", 15,
if_else(county == "Misamis Oriental", 38,
if_else(county == "North Cotabato", 0,
if_else(county == "Sarangani", 25,
if_else(county == "South Cotabato", 55,
if_else(county == "Sultan Kudarat", 62,
if_else(county == "Sulu", 393,
if_else(county == "Surigao del Norte", 29,
if_else(county == "Surigao del Sur", 72,
if_else(county == "Tawi-Tawi", 17,
if_else(county == "Zamboanga City", 0,
if_else(county == "Zamboanga Sibugay", 18,
if_else(county == "Zamboanga del Norte", 34,
if_else(county == "Zamboanga del Sur", 106, 0))))))))))))))))))))))))))))))))
write.csv(perpoint.comb, "perpoint.comb.csv")
Perform GLM
# set seed
set.seed(12345)
## plot data
p1 <- ggplot(prov.comb, aes(x = lmpMeanDistance, y = NumbSp)) +
geom_point(pch = 1) + geom_smooth(method = lm) +
ylab("Number of species") +
xlab("Average distance (m)") +
labs(title = "Richness vs. average distance")
p2 <- ggplot(prov.comb, aes(x = conflictFreq, y = NumbSp)) +
geom_point(pch = 1) + geom_smooth(method = lm) +
ylab("Number of species") +
xlab("Number of conflict") +
labs(title = "Richness vs. conflict frequency")
## combine plots
comb.plot <- (p1 + p2 +
plot_annotation(title = ""))
comb.plot <- comb.plot +
plot_layout(guides = "collect") &
theme(legend.position = "right")
comb.plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## perform glm using lumped species richness
## assign taxa as a factor
prov.comb$class <- factor(prov.comb$class)
## transform data
prov.comb$distlog <- log(prov.comb$lmpMeanDistance + 1)
prov.comb$freqlog <- log(prov.comb$conflictFreq + 1)
## perform glm
glm.res.1 <- glm(NumbSp ~ freqlog + distlog + class,
family="poisson"(link="log"), data = prov.comb)
glm.res.2 <- glm(NumbSp ~ freqlog + class,
family="poisson"(link="log"), data = prov.comb)
glm.res.3 <- glm(NumbSp ~ distlog + class,
family="poisson"(link="log"), data = prov.comb)
glm.res.4 <- glm(NumbSp ~ freqlog + distlog,
family="poisson"(link="log"), data = prov.comb)
## compare glm results
aic.values <- AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
aic.values
## identify the model with the lowest AIC
best.model <- rownames(aic.values)[which.min(aic.values$AIC)]
best.model
## [1] "glm.res.1"
stepAIC(glm.res.1, direction='both') #use stepAIC function
## Start: AIC=3611.67
## NumbSp ~ freqlog + distlog + class
##
## Df Deviance AIC
## <none> 3019.9 3611.7
## - distlog 1 3031.5 3621.2
## - freqlog 1 3061.7 3651.5
## - class 6 5498.8 6078.6
##
## Call: glm(formula = NumbSp ~ freqlog + distlog + class, family = poisson(link = "log"),
## data = prov.comb)
##
## Coefficients:
## (Intercept) freqlog distlog classAmphibia classArachnida
## 3.53068 -0.04845 -0.04300 0.38063 0.45877
## classAves classInsecta classMammalia classReptilia
## 1.30923 1.73036 -0.23588 0.79056
##
## Degrees of Freedom: 111 Total (i.e. Null); 103 Residual
## Null Deviance: 5683
## Residual Deviance: 3020 AIC: 3612
## get summary for best model
summary(get(best.model))
##
## Call:
## glm(formula = NumbSp ~ freqlog + distlog + class, family = poisson(link = "log"),
## data = prov.comb)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.530676 0.126575 27.894 < 2e-16 ***
## freqlog -0.048453 0.007399 -6.548 5.82e-11 ***
## distlog -0.043000 0.012579 -3.418 0.00063 ***
## classAmphibia 0.380634 0.078973 4.820 1.44e-06 ***
## classArachnida 0.458767 0.076184 6.022 1.72e-09 ***
## classAves 1.309235 0.066414 19.713 < 2e-16 ***
## classInsecta 1.730364 0.063853 27.099 < 2e-16 ***
## classMammalia -0.235879 0.083000 -2.842 0.00448 **
## classReptilia 0.790565 0.072266 10.940 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 5683.2 on 111 degrees of freedom
## Residual deviance: 3019.9 on 103 degrees of freedom
## AIC: 3611.7
##
## Number of Fisher Scoring iterations: 5
## plot results
## get fixed effects coefficients and their confidence intervals
fix.eff <- summary(get(best.model))$coefficients
# create a data frame
fix.eff_df <- as.data.frame(fix.eff)
fix.eff_df$term <- rownames(fix.eff_df)
## plot coefficients
ggplot(fix.eff_df, aes(x = term, y = Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Estimate - 1.96 * `Std. Error`,
ymax = Estimate + 1.96 * `Std. Error`), width = 0.2) +
theme_minimal() +
labs(title = "Fixed Effects Estimates", x = "Predictor", y = "Estimate") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## glm with taxa as a random variable
glm.res.5 <- glmer(NumbSp ~ freqlog + distlog + (1|class),
family = "poisson"(link ="log"), data = prov.comb)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: NumbSp ~ freqlog + distlog + (1 | class)
## Data: prov.comb
##
## AIC BIC logLik deviance df.resid
## 3647.4 3658.2 -1819.7 3639.4 108
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.9419 -3.1795 -0.6166 2.4163 19.6111
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.4187 0.6471
## Number of obs: 112, groups: class, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.166851 0.268727 15.506 < 2e-16 ***
## freqlog -0.048435 0.007397 -6.548 5.84e-11 ***
## distlog -0.043257 0.012577 -3.439 0.000583 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) freqlg
## freqlog -0.087
## distlog -0.400 -0.006
## get R^2
## marginal accounts for proportion of variance explained only by the fixed effects
## conditional accounts for proportion of variance explained by the entire mode
rsquared(glm.res.5, method="trigamma")
## plot results
## get fixed effects coefficients and their confidence intervals
fix.eff <- summary(glm.res.5)$coefficients
# create a data frame
fix.eff_df <- as.data.frame(fix.eff)
fix.eff_df$term <- rownames(fix.eff_df)
## plot coefficients
ggplot(fix.eff_df, aes(x = term, y = Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Estimate - 1.96 * `Std. Error`,
ymax = Estimate + 1.96 * `Std. Error`), width = 0.2) +
theme_minimal() +
labs(title = "Fixed Effects Estimates", x = "Predictor", y = "Estimate") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## use species richness per point
#plot data
p1 <- ggplot(perpoint.comb, aes(x = meanDistance, y = NumbSp)) +
geom_point(pch = 1) + geom_smooth(method = lm) +
ylab("Number of species") +
xlab("Average distance (m)") +
labs(title = "Richness vs. average distance")
p2 <- ggplot(perpoint.comb, aes(x = conflictFreq, y = NumbSp)) +
geom_point(pch = 1) + geom_smooth(method = lm) +
ylab("Number of species") +
xlab("Number of conflict") +
labs(title = "Richness vs. conflict frequency")
## combine plots
comb.plot <- (p1 + p2 +
plot_annotation(title = ""))
comb.plot <- comb.plot +
plot_layout(guides = "collect") &
theme(legend.position = "right")
comb.plot
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## assign taxa as a factor
perpoint.comb$class <- factor(perpoint.comb$class)
## transform data
perpoint.comb$distlog <- log(perpoint.comb$meanDistance + 1)
perpoint.comb$freqlog <- log(perpoint.comb$conflictFreq + 1)
## perform glm
glm.res.1 <- glm(NumbSp ~ freqlog + distlog + class,
family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.2 <- glm(NumbSp ~ freqlog + class,
family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.3 <- glm(NumbSp ~ distlog + class,
family = "poisson"(link ="log"), data = perpoint.comb)
glm.res.4 <- glm(NumbSp ~ freqlog + distlog,
family = "poisson"(link ="log"), data = perpoint.comb)
## compare glm results
aic.values <- AIC(glm.res.1, glm.res.2, glm.res.3, glm.res.4)
aic.values
## identify the model with the lowest AIC
best.model <- rownames(aic.values)[which.min(aic.values$AIC)]
best.model
## [1] "glm.res.2"
stepAIC(glm.res.1, direction = 'both') #use stepAIC function
## Start: AIC=11573.48
## NumbSp ~ freqlog + distlog + class
##
## Df Deviance AIC
## - distlog 1 9650.3 11572
## <none> 9649.8 11574
## - freqlog 1 9734.6 11656
## - class 6 12423.3 14335
##
## Step: AIC=11571.98
## NumbSp ~ freqlog + class
##
## Df Deviance AIC
## <none> 9650.3 11572
## + distlog 1 9649.8 11574
## - freqlog 1 9735.8 11656
## - class 6 12459.8 14370
##
## Call: glm(formula = NumbSp ~ freqlog + class, family = poisson(link = "log"),
## data = perpoint.comb)
##
## Coefficients:
## (Intercept) freqlog classAmphibia classArachnida classAves
## 2.67229 -0.05869 0.33428 -0.05851 1.05117
## classInsecta classMammalia classReptilia
## 1.04212 -0.63838 0.42480
##
## Degrees of Freedom: 457 Total (i.e. Null); 450 Residual
## Null Deviance: 12510
## Residual Deviance: 9650 AIC: 11570
#get summary for best model
summary(get(best.model))
##
## Call:
## glm(formula = NumbSp ~ freqlog + class, family = poisson(link = "log"),
## data = perpoint.comb)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.672293 0.054198 49.306 < 2e-16 ***
## freqlog -0.058693 0.006224 -9.430 < 2e-16 ***
## classAmphibia 0.334275 0.059180 5.648 1.62e-08 ***
## classArachnida -0.058513 0.061455 -0.952 0.341
## classAves 1.051174 0.052513 20.017 < 2e-16 ***
## classInsecta 1.042122 0.051502 20.235 < 2e-16 ***
## classMammalia -0.638384 0.067229 -9.496 < 2e-16 ***
## classReptilia 0.424802 0.058706 7.236 4.62e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 12508.2 on 457 degrees of freedom
## Residual deviance: 9650.3 on 450 degrees of freedom
## AIC: 11572
##
## Number of Fisher Scoring iterations: 6
## plot results
## get fixed effects coefficients and their confidence intervals
fix.eff <- summary(get(best.model))$coefficients
# create a data frame
fix.eff_df <- as.data.frame(fix.eff)
fix.eff_df$term <- rownames(fix.eff_df)
## plot coefficients
ggplot(fix.eff_df, aes(x = term, y = Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Estimate - 1.96 * `Std. Error`,
ymax = Estimate + 1.96 * `Std. Error`), width = 0.2) +
theme_minimal() +
labs(title = "Fixed Effects Estimates", x = "Predictor", y = "Estimate") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## glm with taxa as a random variable
glm.res.5 <- glmer(NumbSp ~ freqlog + distlog + (1|class),
family = "poisson"(link = "log"), data = perpoint.comb)
summary(glm.res.5)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: NumbSp ~ freqlog + distlog + (1 | class)
## Data: perpoint.comb
##
## AIC BIC logLik deviance df.resid
## 11610.8 11627.3 -5801.4 11602.8 454
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.247 -2.944 -1.306 0.986 36.119
##
## Random effects:
## Groups Name Variance Std.Dev.
## class (Intercept) 0.3172 0.5632
## Number of obs: 458, groups: class, 7
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.025796 0.223254 13.553 <2e-16 ***
## freqlog -0.058451 0.006229 -9.384 <2e-16 ***
## distlog -0.005461 0.007561 -0.722 0.47
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) freqlg
## freqlog -0.082
## distlog -0.280 -0.049
## get R^2
rsquared(glm.res.5, method = "trigamma")
## plot results
## get fixed effects coefficients and their confidence intervals
fix.eff <- summary(glm.res.5)$coefficients
# create a data frame
fix.eff_df <- as.data.frame(fix.eff)
fix.eff_df$term <- rownames(fix.eff_df)
## plot coefficients
ggplot(fix.eff_df, aes(x = term, y = Estimate)) +
geom_point() +
geom_errorbar(aes(ymin = Estimate - 1.96 * `Std. Error`,
ymax = Estimate + 1.96 * `Std. Error`), width = 0.2) +
theme_minimal() +
labs(title = "Fixed Effects Estimates", x = "Predictor", y = "Estimate") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Calculate proportion of species occurrence records and conflict sites positioned in different land cover types
## here we calculate the number of sites found in different land cover types
## this demonstrates the artifact of sampling, which was not considered in the paper
## land cover data of 1988 was from Swedish Space Corporation (SSC 1988)
## land cover of 2020 was from the Philippine National Mapping and Resource Information Authority
## data was retrieved from geoportal PH (https://www.geoportal.gov.ph/)
## we used land cover as a proxy for tree density and forest canopy data
## the national land cover data is ground-thruthed
## this is only for simplistic illustration of sampling artifact
## the original land cover format is shapefile
## we converted it to raster file for fast data processing
## data conversion was done in QGIS using the plugin "rasterize"
## raster pixel size is 0.008 or approximately 1000 meters
## all raster files have the uniform coordinate reference system, resolution and extent
## here is the 2020 land cover of Mindanao
knitr::include_graphics("images/lc_2020.png")
## read data
files <- list.files("data/landcover/", pattern = "tif", full.names=TRUE)
raster.files <- lapply(files, raster)
raster.files
## [[1]]
## class : RasterLayer
## dimensions : 736, 903, 664608 (nrow, ncol, ncell)
## resolution : 0.008000454, 0.007996592 (x, y)
## extent : 119.3807, 126.6051, 4.58649, 10.47198 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : 1988_lc_rast1km.tif
## names : X1988_lc_rast1km
##
##
## [[2]]
## class : RasterLayer
## dimensions : 736, 903, 664608 (nrow, ncol, ncell)
## resolution : 0.008000454, 0.007996592 (x, y)
## extent : 119.3807, 126.6051, 4.58649, 10.47198 (xmin, xmax, ymin, ymax)
## crs : +proj=longlat +datum=WGS84 +no_defs
## source : 2020_lc_rast1km.tif
## names : X2020_lc_rast1km
## make rasters to have common extent
common_extent <- do.call(union, lapply(raster.files, extent))
raster.files <- lapply(raster.files, function(r) {
extend(r, common_extent)
extent(r) <- common_extent
r
})
## check extent
lapply(raster.files, extent)
## [[1]]
## class : Extent
## xmin : 119.3807
## xmax : 126.6051
## ymin : 4.58649
## ymax : 10.47198
##
## [[2]]
## class : Extent
## xmin : 119.3807
## xmax : 126.6051
## ymin : 4.58649
## ymax : 10.47198
## resample raster files to have a uniform dimension/resolution
standard <- raster.files[[1]]
raster.files <- lapply(raster.files, function(r) {
resample(r, standard, method = "bilinear")
})
## check resolution
lapply(raster.files, res)
## [[1]]
## [1] 0.008000454 0.007996592
##
## [[2]]
## [1] 0.008000454 0.007996592
## stack raster files
lc <- stack(raster.files)
## plot data
plot(lc$X1988_lc_rast1km)
points(con.sub$longitude, con.sub$latitude)
con.coor <- cbind(con.sub$longitude, con.sub$latitude)
## extract data from land cover using conflict data coordinates
con.lc.data <- extract(lc, con.coor)
colnames(con.lc.data)[1] <- "LC_1988"
colnames(con.lc.data)[2] <- "LC_2020"
con.lc.data <- as.data.frame(na.omit(con.lc.data))
con.lc.data$LC_1988 <- as.integer(con.lc.data$LC_1988)
con.lc.data$LC_2020 <- as.integer(con.lc.data$LC_2020)
head(con.lc.data)
## read land cover types code
lc.88.d <- read.csv("data/1988_lc_classmap.csv")
lc.88.d
lc.20.d <- read.csv("data/2020_lc_classmap.csv")
lc.20.d
## add land cover descriptions
con.lc.data.m1 <- con.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 1, "Closed Canopy",
if_else(LC_1988 == 2,"Cultivated Area mixed with brushland/grassland",
if_else(LC_1988 == 3, "Crop land mixed with coconut plantation",
if_else(LC_1988 == 4, "Unclassified",
if_else(LC_1988 == 5, "Open Canopy",
if_else(LC_1988 == 6, "Arable land, crops mainly cereals and sugar",
if_else(LC_1988 == 7, "Fishponds derived from mangrove",
if_else(LC_1988 == 8, "Coral Reef",
if_else(LC_1988 == 9, "Grassland, grass covering > 70 percent",
if_else(LC_1988 == 10, "Lake",
if_else(LC_1988 == 11, "Mangrove vegetation",
if_else(LC_1988 == 12, "Built-up Area",
if_else(LC_1988 == 13, "Quarry",
if_else(LC_1988 == 14, "Coconut plantations",
if_else(LC_1988 == 15, "Siltation pattern in lake",
if_else(LC_1988 == 16, "Marshy area and swamp",
if_else(LC_1988 == 17, "Riverbeds",
if_else(LC_1988 == 18, "Crop land mixed with other plantation",
if_else(LC_1988 == 19, "Other plantations", "No data"))))))))))))))))))))
con.lc.data.m2 <- con.lc.data %>% mutate(con.lc.data.m1,
class_2020 = if_else(LC_2020 == 1, "Grassland",
if_else(LC_2020 == 2,"Annual Crop",
if_else(LC_2020 == 3, "Marshland/Swamp",
if_else(LC_2020 == 4, "Perennial Crop",
if_else(LC_2020 == 5, "Built-up",
if_else(LC_2020 == 6, "Fishpond",
if_else(LC_2020 == 7, "Inland Water",
if_else(LC_2020 == 8, "Mangrove Forest",
if_else(LC_2020 == 9, "Brush/Shrubs",
if_else(LC_2020 == 10, "Closed Forest",
if_else(LC_2020 == 11, "Open Forest",
if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))
## count the number of conflict sites in each land cover type
## 1988 data
con.lc.count.88 <- con.lc.data.m2 %>%
group_by(class_1988) %>%
summarise(n())
colnames(con.lc.count.88)[2] <- "count"
con.lc.count.88
ggplot(data=con.lc.count.88, aes(x=reorder(class_1988, -count), y=count)) +
geom_bar(stat="identity", fill="#5794db") +
guides(x = guide_axis(angle = 75)) +
xlab("1988 LC Type") +
ylab("Count") +
labs(title = "Conflict count per LC type (1988)")
con.lc.count.88 <- as.data.frame(con.lc.count.88)
write.csv(con.lc.count.88, "con.lc.count.88.csv")
## 2020 data
con.lc.count.20 <- con.lc.data.m2 %>%
group_by(class_2020) %>%
summarise(n())
colnames(con.lc.count.20)[2] <- "count"
con.lc.count.20
ggplot(data=con.lc.count.20, aes(x=reorder(class_2020, -count), y=count)) +
geom_bar(stat="identity", fill="#5794db") +
guides(x = guide_axis(angle = 75)) +
xlab("2020 LC Type") +
ylab("Count") +
labs(title = "Conflict count per LC type (2020)")
con.lc.count.20 <- as.data.frame(con.lc.count.20)
write.csv(con.lc.count.20, "con.lc.count.20.csv")
## extract data from land cover using biodiversity data coordinates
bio.coor <- cbind(bio.sub$decimalLongitude, bio.sub$decimalLatitude)
bio.lc.data <- extract(lc, bio.coor)
colnames(bio.lc.data )[1] <- "LC_1988"
colnames(bio.lc.data )[2] <- "LC_2020"
bio.lc.data <- as.data.frame(na.omit(bio.lc.data))
bio.lc.data$LC_1988 <- as.integer(bio.lc.data$LC_1988)
bio.lc.data$LC_2020 <- as.integer(bio.lc.data$LC_2020)
head(bio.lc.data)
## add land cover descriptions
bio.lc.data.m1 <- bio.lc.data %>% mutate(class_1988 = if_else(LC_1988 == 1, "Closed Canopy",
if_else(LC_1988 == 2,"Cultivated Area mixed with brushland/grassland",
if_else(LC_1988 == 3, "Crop land mixed with coconut plantation",
if_else(LC_1988 == 4, "Unclassified",
if_else(LC_1988 == 5, "Open Canopy",
if_else(LC_1988 == 6, "Arable land, crops mainly cereals and sugar",
if_else(LC_1988 == 7, "Fishponds derived from mangrove",
if_else(LC_1988 == 8, "Coral Reef",
if_else(LC_1988 == 9, "Grassland, grass covering > 70 percent",
if_else(LC_1988 == 10, "Lake",
if_else(LC_1988 == 11, "Mangrove vegetation",
if_else(LC_1988 == 12, "Built-up Area",
if_else(LC_1988 == 13, "Quarry",
if_else(LC_1988 == 14, "Coconut plantations",
if_else(LC_1988 == 15, "Siltation pattern in lake",
if_else(LC_1988 == 16, "Marshy area and swamp",
if_else(LC_1988 == 17, "Riverbeds",
if_else(LC_1988 == 18, "Crop land mixed with other plantation",
if_else(LC_1988 == 19, "Other plantations", "No data"))))))))))))))))))))
bio.lc.data.m2<- bio.lc.data %>% mutate(bio.lc.data.m1,
class_2020 = if_else(LC_2020 == 1, "Grassland",
if_else(LC_2020 == 2,"Annual Crop",
if_else(LC_2020 == 3, "Marshland/Swamp",
if_else(LC_2020 == 4, "Perennial Crop",
if_else(LC_2020 == 5, "Built-up",
if_else(LC_2020 == 6, "Fishpond",
if_else(LC_2020 == 7, "Inland Water",
if_else(LC_2020 == 8, "Mangrove Forest",
if_else(LC_2020 == 9, "Brush/Shrubs",
if_else(LC_2020 == 10, "Closed Forest",
if_else(LC_2020 == 11, "Open Forest",
if_else(LC_2020 == 12, "Open/Barren", "No data")))))))))))))
## count the number of biodiversity points in each land cover type
## 1988 data
bio.lc.count.88 <- bio.lc.data.m2 %>%
group_by(class_1988) %>%
summarise(n())
colnames(bio.lc.count.88)[2] <- "count"
bio.lc.count.88
ggplot(data=bio.lc.count.88, aes(x=reorder(class_1988, -count), y=count)) +
geom_bar(stat="identity", fill="#5794db") +
guides(x = guide_axis(angle = 75)) +
xlab("1988 LC Type") +
ylab("Count") +
labs(title = "Biodiversity data count per LC type (1988)")
bio.lc.count.88 <- as.data.frame(bio.lc.count.88)
write.csv(bio.lc.count.88, "bio.lc.count.88.csv")
## 2020 data
bio.lc.count.20 <- bio.lc.data.m2 %>%
group_by(class_2020) %>%
summarise(n())
colnames(bio.lc.count.20)[2] <- "count"
bio.lc.count.20
ggplot(data=bio.lc.count.20, aes(x=reorder(class_2020, -count), y=count)) +
geom_bar(stat="identity", fill="#5794db") +
guides(x = guide_axis(angle = 75)) +
xlab("2020 LC Type") +
ylab("Count") +
labs(title = "Biodiversity data count per LC type (2020)")
bio.lc.count.20 <- as.data.frame(bio.lc.count.20)
write.csv(bio.lc.count.20, "bio.lc.count.20.csv")